data {
  int<lower=1> T; // Number of time points
  vector[T] y;      // Observed time series
}
parameters {
  real mu;          // Mean parameter
  real theta1;      // MA coefficient for lag 1
  real theta2;      // MA coefficient for lag 2
  real<lower=0> sigma; // Noise standard deviation
}
model {
  // Priors
  mu ~ normal(0, 10);
  theta1 ~ normal(0, 10);
  theta2 ~ normal(0, 10);
  sigma ~ uniform(0, 5);

  // Local variables for predictions and errors
  vector[T] nu;
  vector[T] err;

  // Initial conditions
  nu[1] = mu;
  err[1] = y[1] - nu[1];

  for (t in 2:T) {
    // Predictions depend on past errors
    nu[t] = mu + theta1 * err[t-1] + theta2 * (t > 2 ? err[t-2] : 0);
    err[t] = y[t] - nu[t];
  }

  // Likelihood
  // The noise terms are assumed to be normal with mean 0 and standard deviation sigma
  // The vector err is a function of the observations and parameters,
  // so we define the likelihood on the error terms.
  // This can be done by assuming err is drawn from a normal distribution with mean 0
  // and standard deviation sigma.
  err ~ normal(0, sigma);
}